require(dplyr)
require(data.table)
library(EnvStats)
d1 <- fread("https://biostat.app.vumc.org/wiki/pub/Main/DataSets/nhgh.tsv") %>%
as.data.frame
d1 <- d1 %>%
filter(sex == "female") %>%
filter(age >= 18) %>%
select(gh, ht) %>%
filter(1:n()<=1000)
# Height
xbarh <- mean(d1$ht)
xbarh
## [1] 160.7419
stdevh <- sd(d1$ht)
stdevh
## [1] 7.320161
# Glycohemoglobin
xbarg <- mean(d1$gh)
xbarg
## [1] 5.7246
stdevg <- sd(d1$gh)
stdevg
## [1] 1.052246
hist(d1$ht,
freq = FALSE,
main = "Normal Height MM",
breaks = 100)
curve(dnorm(x,
mean = xbarh,
sd = stdevh),
add = TRUE,
col = "blue",
lwd = 3,
main = "Normal Height MM")
hist(d1$gh,
freq = FALSE,
main = "Normal Glycohemoglobin MM",
breaks = 100)
curve(dnorm(x,
mean = xbarg,
sd = stdevg),
add = TRUE,
col = "red",
lwd = 3,
main = "Normal Glycohemoglobin MM")
plot(ecdf(d1$ht),
main = "Normal Height MM")
curve(pnorm(x,
mean = xbarh,
sd = stdevh),
add = TRUE,
col = "blue",
lwd = 3,
main = "Normal Height MM")
plot(ecdf(d1$gh),
main = "Normal Glycohemoglobin MM")
curve(pnorm(x,
mean = xbarg,
sd = stdevg),
add = TRUE,
col = "red",
lwd = 3,
main = "Normal Glycohemoglobin MM")
q_values <- seq(0.05, 0.95, length = 100)
sampleh <- quantile(d1$ht, q_values)
theoreticalh <- qnorm(q_values, mean = xbarh,
sd = stdevh)
plot(sampleh, theoreticalh, pch = 16, main = "Q-Q Plot Normal Heights (MM)")
abline(0,1, col = "blue", lwd = 2)
sampleg <- quantile(d1$gh, q_values)
theoreticalg <- qnorm(q_values, mean = xbarg,
sd = stdevg)
plot(sampleg, theoreticalg, pch = 16, main = "Q-Q Plot Normal Glycohemoglobin (MM)")
abline(0,1, col = "red", lwd = 2)
M <- 5000
N <- 1000
simh <- rnorm(N*M, mean = xbarh,
sd = stdevh) %>% array(dim = c(M,N))
sample_dist <- apply(simh, 1, median)
hist(sample_dist, breaks = 50, main = "Normal Distribution of Medians \nHeights (MM)", xlab = "Median Height")
abline(v = qnorm(.5, mean = xbarh, sd = stdevh),
lwd = 3, col = "blue")
qnorm(.5, mean = xbarh, sd = stdevh)
## [1] 160.7419
simg <- rnorm(N*M, mean = xbarg,
sd = stdevg) %>% array(dim = c(M,N))
sample_dist <- apply(simg, 1, median)
hist(sample_dist, breaks = 50, main = "Normal Distribution of Medians \nGlycohemoglobin (MM)", xlab = "Median Glycohemoglobin")
abline(v = qnorm(.5, mean = xbarg, sd = stdevg),
lwd = 3, col = "red")
qnorm(.5, mean = xbarg, sd = stdevg)
## [1] 5.7246
# Height
xbarh <- mean(d1$ht)
stdevh <- var(d1$ht)
(shape_hath <- xbarh^2/stdevh)
## [1] 482.1886
(scale_hath <- stdevh/xbarh)
## [1] 0.333359
# Glycohemoglobin
xbarg <- mean(d1$gh)
stdevg <- var(d1$gh)
(shape_hatg <- xbarg^2/stdevg)
## [1] 29.59754
(scale_hatg <- stdevg/xbarg)
## [1] 0.1934147
hist(d1$ht,
freq = FALSE,
main = "Gamma Height MM",
breaks = 75)
curve(dgamma(x, shape = shape_hath,
scale = scale_hath),
add = TRUE,
col = "blue",
lwd = 3,
main = "Gamma Height MM")
hist(d1$gh,
freq = FALSE,
main = "Gamma Glycohemoglobin MM",
breaks = 75)
curve(dgamma(x, shape = shape_hatg,
scale = scale_hatg),
add = TRUE,
col = "red",
lwd = 3,
main = "Gamma Glycohemoglobin MM")
plot(ecdf(d1$ht),
main = "Gamma Height MM")
curve(pgamma(x,
shape = shape_hath,
scale = scale_hath),
add = TRUE,
col = "blue",
lwd = 3,
main = "Gamma Height MM")
plot(ecdf(d1$gh),
main = "Gamma Glycohemoglobin MM")
curve(pgamma(x,
shape = shape_hatg,
scale = scale_hatg),
add = TRUE,
col = "red",
lwd = 3,
main = "Gamma Glycohemoglobin MM")
q_values <- seq(0.01, 0.99, length = 99)
sampleh <- quantile(d1$ht, q_values)
theoreticalh <- qgamma(q_values,
shape = shape_hath,
scale = scale_hath)
plot(sampleh, theoreticalh, pch = 16, main = "Q-Q Plot Gamma Heights (MM)")
abline(0,1, col = "blue", lwd = 2)
sampleg <- quantile(d1$gh, q_values)
theoreticalg <- qgamma(q_values,
shape = shape_hatg,
scale = scale_hatg)
plot(sampleg, theoreticalg, pch = 16, main = "Q-Q Plot Gamma Glycohemoglobin (MM)")
abline(0,1, col = "red", lwd = 2)
M <- 5000
N <- 1000
simh <- rgamma(N*M, scale = scale_hath,
shape = shape_hath) %>% array(dim = c(M,N))
sample_dist <- apply(simh, 1, median)
hist(sample_dist, breaks = 50, main = "Gamma Distribution of Medians \nHeights (MM)", xlab = "Median Height")
abline(v = qgamma(.5, scale = scale_hath, shape = shape_hath),
lwd = 3, col = "blue")
qgamma(.5, shape = shape_hath, scale = scale_hath)
## [1] 160.6308
simg <- rgamma(N*M, scale = scale_hatg, shape = shape_hatg) %>%
array(dim = c(M,N))
sample_dist <- apply(simg, 1, median)
hist(sample_dist, breaks = 50, main = "Gamma Distribution of Medians \nGlycohemoglobin (MM)", xlab = "Median Glycohemoglobin")
abline(v = qgamma(.5, scale = scale_hatg, shape = shape_hatg),
lwd = 3, col = "red")
qgamma(.5, scale = scale_hatg, shape = shape_hatg)
## [1] 5.660259
wht <- eweibull(d1$ht, method = "mme")
wht$parameters[1]
## shape
## 27.47348
wht$parameters[2]
## scale
## 163.9791
wgh <- eweibull(d1$gh, method = "mme")
wgh$parameters[1]
## shape
## 6.356614
wgh$parameters[2]
## scale
## 6.151127
hist(d1$ht, breaks = 100, freq = FALSE, main = "Weibull Height (MM)", xlab = "Height")
curve(dweibull(x, shape = wht$parameters[1],
scale = wht$parameters[2]),
col = "blue",
lwd = 6,
add = TRUE)
hist(d1$gh, breaks = 100, freq = FALSE, main = "Weibull Glycohemoglobin (MM)", xlab = "Glycohemoglobin")
curve(dweibull(x, shape = wgh$parameters[1],
scale = wgh$parameters[2]),
col = "red",
lwd = 6,
add = TRUE)
plot(ecdf(d1$ht),main = "Weibull Height MM", xlab = "Height")
curve(pweibull(
x,
shape = wht$parameters[1],
scale = wht$parameters[2]
),
add = TRUE, col = "blue", lwd = 2)
plot(ecdf(d1$gh),main = "Weibull Glychohemoglobin MM", xlab = "Height")
curve(pweibull(
x,
shape = wgh$parameters[1],
scale = wgh$parameters[2]
),
add = TRUE, col = "red", lwd = 2)
q_values <- seq(0.05, 0.95, length = 100)
sampleh <- quantile(d1$ht, q_values)
theoreticalh <- qweibull(q_values,
shape = wht$parameters[1],
scale = wht$parameters[2]
)
plot(sampleh, theoreticalh, pch = 16, main = "Q-Q Plot Weibull Heights (MM)")
abline(0,1, col = "blue", lwd = 2)
sampleg <- quantile(d1$gh, q_values)
theoreticalg <- qweibull(q_values,
shape = wgh$parameters[1],
scale = wgh$parameters[2]
)
plot(sampleg, theoreticalg, pch = 16, main = "Q-Q Plot Weibull Glycohemoglobin (MM)")
abline(0,1, col = "red", lwd = 2)
M <- 5000
N <- 1000
simh <- rweibull(N*M,
shape = wht$parameters[1],
scale = wht$parameters[2]) %>%
array(dim = c(M,N))
sample_dist <- apply(simh, 1, median)
hist(sample_dist, breaks = 50, main = "Weibull Distribution of Medians \nHeights (MM)", xlab = "Median Height")
abline(v = qweibull(.5,
shape = wht$parameters[1],
scale = wht$parameters[2]),
lwd = 3, col = "blue")
qweibull(.5,
shape = wht$parameters[1],
scale = wht$parameters[2])
## [1] 161.8061
simg <- rweibull(N*M,
shape = wgh$parameters[1],
scale = wgh$parameters[2]) %>%
array(dim = c(M,N))
sample_dist <- apply(simg, 1, median)
hist(sample_dist, breaks = 50, main = "Weibull Distribution of Medians \nGlycohemoglobin (MM)", xlab = "Median Glycohemoglobin")
abline(v = qweibull(.5,
shape = wgh$parameters[1],
scale = wgh$parameters[2]),
lwd = 3, col = "red")
qweibull(.5,
shape = wgh$parameters[1],
scale = wgh$parameters[2])
## [1] 5.806494
nht <- enorm(d1$ht, method = "mle")
nht$parameters[1]
## mean
## 160.7419
nht$parameters[2]
## sd
## 7.3165
ngh <- enorm(d1$gh, method = "mle")
ngh$parameters[1]
## mean
## 5.7246
ngh$parameters[2]
## sd
## 1.05172
hist(d1$ht, breaks = 100, freq = FALSE, main = "Normal Height (MLE)", xlab = "Height")
curve(dnorm(x, mean = nht$parameters[1],
sd = nht$parameters[2]),
col = "blue",
lwd = 6,
add = TRUE)
hist(d1$gh, breaks = 100, freq = FALSE, main = "Normal Glycohemoglobin (MLE)", xlab = "Glycohemoglobin")
curve(dnorm(x, mean = ngh$parameters[1],
sd = ngh$parameters[2]),
col = "red",
lwd = 6,
add = TRUE)
plot(ecdf(d1$ht),main = "Normal Height MLE", xlab = "Height")
curve(pnorm(
x,
mean = nht$parameters[1],
sd = nht$parameters[2]
),
add = TRUE, col = "blue", lwd = 2)
plot(ecdf(d1$gh),main = "Normal Glychohemoglobin MLE", xlab = "Height")
curve(pnorm(
x,
mean = ngh$parameters[1],
sd = ngh$parameters[2]
),
add = TRUE, col = "red", lwd = 2)
q_values <- seq(0.05, 0.95, length = 100)
sampleh <- quantile(d1$ht, q_values)
theoreticalh <- qnorm(q_values,
mean = nht$parameters[1],
sd = nht$parameters[2]
)
plot(sampleh, theoreticalh, pch = 16, main = "Q-Q Plot Normal Heights (MLE)")
abline(0,1, col = "blue", lwd = 2)
sampleg <- quantile(d1$gh, q_values)
theoreticalg <- qnorm(q_values,
mean = ngh$parameters[1],
sd = ngh$parameters[2]
)
plot(sampleg, theoreticalg, pch = 16, main = "Q-Q Plot Normal Glycohemoglobin (MLE)")
abline(0,1, col = "red", lwd = 2)
M <- 5000
N <- 1000
simh <- rnorm(N*M,
mean = nht$parameters[1],
sd = nht$parameters[2]) %>%
array(dim = c(M,N))
sample_dist <- apply(simh, 1, median)
hist(sample_dist, breaks = 50, main = "Normal Distribution of Medians \nHeights (MLE)", xlab = "Median Height")
abline(v = qnorm(.5,
mean = nht$parameters[1],
sd = nht$parameters[2]),
lwd = 3, col = "blue")
qnorm(.5,
mean = nht$parameters[1],
sd = nht$parameters[2])
## [1] 160.7419
simg <- rnorm(N*M,
mean = ngh$parameters[1],
sd = ngh$parameters[2]) %>%
array(dim = c(M,N))
sample_dist <- apply(simg, 1, median)
hist(sample_dist, breaks = 50, main = "Normal Distribution of Medians \nGlycohemoglobin (MLE)", xlab = "Median Glycohemoglobin")
abline(v = qnorm(.5,
mean = ngh$parameters[1],
sd = ngh$parameters[2]),
lwd = 3, col = "red")
qnorm(.5,
mean = ngh$parameters[1],
sd = ngh$parameters[2])
## [1] 5.7246
ght <- egamma(d1$ht, method = "mle")
ght$parameters[1]
## shape
## 482.6712
ght$parameters[2]
## scale
## 0.3330256
ggh <- egamma(d1$gh, method = "mle")
ggh$parameters[1]
## shape
## 40.81761
ggh$parameters[2]
## scale
## 0.1402483
hist(d1$ht, breaks = 100, freq = FALSE, main = "Gamma Height (MLE)", xlab = "Height")
curve(dgamma(x, shape = ght$parameters[1],
scale = ght$parameters[2]),
col = "blue",
lwd = 6,
add = TRUE)
hist(d1$gh, breaks = 100, freq = FALSE, main = "Gamma Glycohemoglobin (MLE)", xlab = "Glycohemoglobin")
curve(dgamma(x, shape = ggh$parameters[1],
scale = ggh$parameters[2]),
col = "red",
lwd = 6,
add = TRUE)
plot(ecdf(d1$ht),main = "Gamma Height MLE", xlab = "Height")
curve(pgamma(
x,
shape = ght$parameters[1],
scale = ght$parameters[2]
),
add = TRUE, col = "blue", lwd = 2)
plot(ecdf(d1$gh),main = "Gamma Glychohemoglobin MLE", xlab = "Height")
curve(pgamma(
x,
shape = ggh$parameters[1],
scale = ggh$parameters[2]
),
add = TRUE, col = "red", lwd = 2)
q_values <- seq(0.05, 0.95, length = 100)
sampleh <- quantile(d1$ht, q_values)
theoreticalh <- qgamma(q_values,
shape = ght$parameters[1],
scale = ght$parameters[2]
)
plot(sampleh, theoreticalh, pch = 16, main = "Q-Q Plot Gamma Heights (MLE)")
abline(0,1, col = "blue", lwd = 2)
sampleg <- quantile(d1$gh, q_values)
theoreticalg <- qgamma(q_values,
shape = ggh$parameters[1],
scale = ggh$parameters[2]
)
plot(sampleg, theoreticalg, pch = 16, main = "Q-Q Plot Gamma Glycohemoglobin (MLE)")
abline(0,1, col = "red", lwd = 2)
M <- 5000
N <- 1000
simh <- rgamma(N*M,
shape = ght$parameters[1],
scale = ght$parameters[2]) %>%
array(dim = c(M,N))
sample_dist <- apply(simh, 1, median)
hist(sample_dist, breaks = 50, main = "Gamma Distribution of Medians \nHeights (MLE)", xlab = "Median Height")
abline(v = qgamma(.5,
shape = ght$parameters[1],
scale = ght$parameters[2]),
lwd = 3, col = "blue")
qgamma(.5,
shape = ght$parameters[1],
scale = ght$parameters[2])
## [1] 160.6309
simg <- rgamma(N*M,
shape = ggh$parameters[1],
scale = ggh$parameters[2]) %>%
array(dim = c(M,N))
sample_dist <- apply(simg, 1, median)
hist(sample_dist, breaks = 50, main = "Gamma Distribution of Medians \nGlycohemoglobin (MLE)", xlab = "Median Glycohemoglobin")
abline(v = qgamma(.5,
shape = ggh$parameters[1],
scale = ggh$parameters[2]),
lwd = 3, col = "red")
qgamma(.5,
shape = ggh$parameters[1],
scale = ggh$parameters[2])
## [1] 5.677919
wht <- eweibull(d1$ht, method = "mle")
wht$parameters[1]
## shape
## 21.85398
wht$parameters[2]
## scale
## 164.2472
wgh <- eweibull(d1$gh, method = "mle")
wgh$parameters[1]
## shape
## 4.125254
wgh$parameters[2]
## scale
## 6.173884
hist(d1$ht, breaks = 100, freq = FALSE, main = "Weibull Height (MLE)", xlab = "Height")
curve(dweibull(x, shape = wht$parameters[1],
scale = wht$parameters[2]),
col = "blue",
lwd = 6,
add = TRUE)
hist(d1$gh, breaks = 100, freq = FALSE, main = "Weibull Glycohemoglobin (MLE)", xlab = "Glycohemoglobin")
curve(dweibull(x, shape = wgh$parameters[1],
scale = wgh$parameters[2]),
col = "red",
lwd = 6,
add = TRUE)
plot(ecdf(d1$ht),main = "Weibull Height MLE", xlab = "Height")
curve(pweibull(
x,
shape = wht$parameters[1],
scale = wht$parameters[2]
),
add = TRUE, col = "blue", lwd = 2)
plot(ecdf(d1$gh),main = "Weibull Glychohemoglobin MLE", xlab = "Height")
curve(pweibull(
x,
shape = wgh$parameters[1],
scale = wgh$parameters[2]
),
add = TRUE, col = "red", lwd = 2)
q_values <- seq(0.05, 0.95, length = 100)
sampleh <- quantile(d1$ht, q_values)
theoreticalh <- qweibull(q_values,
shape = wht$parameters[1],
scale = wht$parameters[2]
)
plot(sampleh, theoreticalh, pch = 16, main = "Q-Q Plot Weibull Heights (MLE)")
abline(0,1, col = "blue", lwd = 2)
sampleg <- quantile(d1$gh, q_values)
theoreticalg <- qweibull(q_values,
shape = wgh$parameters[1],
scale = wgh$parameters[2]
)
plot(sampleg, theoreticalg, pch = 16, main = "Q-Q Plot Weibull Glycohemoglobin (MLE)")
abline(0,1, col = "red", lwd = 2)
M <- 5000
N <- 1000
simh <- rweibull(N*M,
shape = wht$parameters[1],
scale = wht$parameters[2]) %>%
array(dim = c(M,N))
sample_dist <- apply(simh, 1, median)
hist(sample_dist, breaks = 50, main = "Weibull Distribution of Medians \nHeights (MLE)", xlab = "Median Height")
abline(v = qweibull(.5,
shape = wht$parameters[1],
scale = wht$parameters[2]),
lwd = 3, col = "blue")
qweibull(.5,
shape = wht$parameters[1],
scale = wht$parameters[2])
## [1] 161.5156
simg <- rweibull(N*M,
shape = wgh$parameters[1],
scale = wgh$parameters[2]) %>%
array(dim = c(M,N))
sample_dist <- apply(simg, 1, median)
hist(sample_dist, breaks = 50, main = "Weibull Distribution of Medians \nGlycohemoglobin (MLE)", xlab = "Median Glycohemoglobin")
abline(v = qweibull(.5,
shape = wgh$parameters[1],
scale = wgh$parameters[2]),
lwd = 3, col = "red")
qweibull(.5,
shape = wgh$parameters[1],
scale = wgh$parameters[2])
## [1] 5.64902